Skip if module_update.py is in same folder as notebook, next cell downloads this file which in turn will download other files.
! curl -s "https://raw.githubusercontent.com/Yoonsen/Modules/master/module_update.py" > "module_update.py"
Import text and graph modules for analysis, and css for display
from module_update import update, css, code_toggle, printmd
css()
code_toggle()
!
update('graph_networkx_louvain', overwrite=True)
update('nbtext', overwrite=True)
import nbtext as nb
import graph_networkx_louvain as gnl
import networkx as nx
from graph_networkx_louvain import cutdown, make_collocation_graph
from collections import Counter
import warnings
warnings.simplefilter("ignore")
sample = nb.nb_ngram("demokrati, diktatur", years=(1920, 2010), smooth=1)
sample.head()
sample.rolling(window=5).mean().plot(figsize=(15,8), lw=3);
sample = nb.nb_ngram("Tyskland, England, USA, Russland", years=(1920, 2010), smooth=1)
sample.rolling(window=5).mean().plot(figsize=(15,8), lw=3);
sample1 = nb.nb_ngram(" USA, Amerika", years=(1860, 2010), smooth=1)
sample1.rolling(window=5).mean().plot(figsize=(15,8), lw=3);
sample2 = nb.nb_ngram("Russland, Rusland", years=(1860, 2010), smooth=1)
sample2.rolling(window=5).mean().plot(figsize=(15,8), lw=3);
nb.sorted_wildcardsearch({'word':'Trond*m', 'factor':2}).head(10)
sample2 = nb.nb_ngram("Trondheim, Trondhjem", years=(1860, 2010), smooth=1)
sample2.rolling(window=5).mean().plot(figsize=(15,8), lw=3);
G = nb.make_graph('Os', cutoff = 12)
gnl.show_graph(G, spread = 1.9, fontsize=12)
gnl.show_community(G)
gnl.show_cliques(G)
G2 = nb.make_graph('Flå', cutoff = 10)
gnl.show_graph(G2, spread = 1.4, fontsize=12)
gnl.show_community(G2)
gnl.show_cliques(G2)
korpus = nb.get_urn({'year':1820, 'next': 100, 'limit':300})
nb.frame(korpus[:10])
nb.get_urnkonk('Trøndelag', {
'urns':korpus[:50],
'before': 10,
'after': 10,
'size':5
}, html=True)
tot = nb.frame(nb.totals(30000))
nb.normalize_corpus_dataframe(tot)
tot.head(20)
korpus = nb.get_urn({'year':1960, 'next': 100, 'limit':300})
nb.frame(korpus[:10])
coll = nb.urn_coll('demokrati', urns=korpus)
nb.normalize_corpus_dataframe(coll)
diff = (coll**1.2/tot).sort_values(by = 0, ascending = False)
nb.cloud(diff.head(100)/diff.head(100).sum(), stretch=4)
coll2 = nb.urn_coll('Tyskland', urns=korpus)
nb.normalize_corpus_dataframe(coll2)
diff2 = (coll2**1.5/tot).sort_values(by = 0, ascending = False)
d2 = nb.frame(diff2[0].loc[[x for x in diff2.index if x.isalpha()]])
#d2.head(20)
nb.cloud(d2.head(100)/d2.head(100).sum(), stretch=4)
books = nb.get_urn({'author':'fløgsta%'})
books
F = nb.navn(2008110504006)
Removals = nb.spurious_names(300)
Removals = Removals + ['Hvem', 'Hva', 'Hvorfor', 'Hei', 'Okay']
Fn = nb.check_navn(F, remove=Removals)
Fg = nb.make_network_graph(2008110504006, Fn)
gnl.show_graph(Fg, spread=1.5, fontsize=12)
gnl.show_communities(Fg)
nb.plot_book_wordbags(2008110504006, gnl.community_dict(Fg)).plot(figsize=(15,8), lw=3, alpha=0.5);
nb.cloud(nb.frame(dict(nb.central_betweenness_characters(Fg, 80))), stretch=3);
nb.get_urn({'author':'Mykle%', 'title': 'sang%', 'year':1955, 'next':60})
navn = nb.navn('2016020808126')
navn = nb.check_navn(navn, limit=10)
navn
G = nb.make_network(2016020808126, navn)
gnl.show_graph(G)
gnl.show_communities(G)
nb.plot_book_wordbags(2016020808126, gnl.community_dict(Fg)).plot(figsize=(15,6), lw=3, alpha=0.5);